import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
german_credit_df = pd.read_csv(r"C:\Users\jki\Downloads\german_credit_data.csv")
german_credit_df.head(5)
Unnamed: 0 | Age | Sex | Job | Housing | Saving accounts | Checking account | Credit amount | Duration | Purpose | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 67 | male | 2 | own | NaN | little | 1169 | 6 | radio/TV |
1 | 1 | 22 | female | 2 | own | little | moderate | 5951 | 48 | radio/TV |
2 | 2 | 49 | male | 1 | own | little | NaN | 2096 | 12 | education |
3 | 3 | 45 | male | 2 | free | little | little | 7882 | 42 | furniture/equipment |
4 | 4 | 53 | male | 2 | free | little | little | 4870 | 24 | car |
print(german_credit_df.columns)
Index(['Unnamed: 0', 'Age', 'Sex', 'Job', 'Housing', 'Saving accounts', 'Checking account', 'Credit amount', 'Duration', 'Purpose'], dtype='object')
print("Purpose : ",german_credit_df.Purpose.unique())
print("Sex : ",german_credit_df.Sex.unique())
print("Housing : ",german_credit_df.Housing.unique())
print("Saving accounts : ",german_credit_df['Saving accounts'].unique())
print("Checking account : ",german_credit_df['Checking account'].unique())
Purpose : ['radio/TV' 'education' 'furniture/equipment' 'car' 'business' 'domestic appliances' 'repairs' 'vacation/others'] Sex : ['male' 'female'] Housing : ['own' 'free' 'rent'] Saving accounts : [nan 'little' 'quite rich' 'rich' 'moderate'] Checking account : ['little' 'moderate' nan 'rich']
german_credit_df['Saving accounts'] = german_credit_df['Saving accounts'].map({"little":0,"moderate":1,"quite rich":2 ,"rich":3 });
german_credit_df['Saving accounts'] = german_credit_df['Saving accounts'].fillna(german_credit_df['Saving accounts'].dropna().mean())
german_credit_df['Checking account'] = german_credit_df['Checking account'].map({"little":0,"moderate":1,"rich":2 });
german_credit_df['Checking account'] = german_credit_df['Checking account'].fillna(german_credit_df['Checking account'].dropna().mean())
german_credit_df['Sex'] = german_credit_df['Sex'].map({"male":0,"female":1}).astype(float);
german_credit_df['Housing'] = german_credit_df['Housing'].map({"own":0,"free":1,"rent":2}).astype(float);
german_credit_df['Purpose'] = german_credit_df['Purpose'].map({'radio/TV':0, 'education':1, 'furniture/equipment':2, 'car':3, 'business':4,
'domestic appliances':5, 'repairs':6, 'vacation/others':7}).astype(float);
german_credit_df.head(10)
Unnamed: 0 | Age | Sex | Job | Housing | Saving accounts | Checking account | Credit amount | Duration | Purpose | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 67 | 0.0 | 2 | 0.0 | 0.456548 | 0.000000 | 1169 | 6 | 0.0 |
1 | 1 | 22 | 1.0 | 2 | 0.0 | 0.000000 | 1.000000 | 5951 | 48 | 0.0 |
2 | 2 | 49 | 0.0 | 1 | 0.0 | 0.000000 | 0.651815 | 2096 | 12 | 1.0 |
3 | 3 | 45 | 0.0 | 2 | 1.0 | 0.000000 | 0.000000 | 7882 | 42 | 2.0 |
4 | 4 | 53 | 0.0 | 2 | 1.0 | 0.000000 | 0.000000 | 4870 | 24 | 3.0 |
5 | 5 | 35 | 0.0 | 1 | 1.0 | 0.456548 | 0.651815 | 9055 | 36 | 1.0 |
6 | 6 | 53 | 0.0 | 2 | 0.0 | 2.000000 | 0.651815 | 2835 | 24 | 2.0 |
7 | 7 | 35 | 0.0 | 3 | 2.0 | 0.000000 | 1.000000 | 6948 | 36 | 3.0 |
8 | 8 | 61 | 0.0 | 1 | 0.0 | 3.000000 | 0.651815 | 3059 | 12 | 0.0 |
9 | 9 | 28 | 0.0 | 3 | 0.0 | 0.000000 | 1.000000 | 5234 | 30 | 3.0 |
plt.scatter(german_credit_df['Credit amount'],german_credit_df["Age"])
plt.figure()
<Figure size 640x480 with 0 Axes>
<Figure size 640x480 with 0 Axes>
sns.pairplot(german_credit_df)
<seaborn.axisgrid.PairGrid at 0x1b118e61650>
plt.scatter(german_credit_df['Credit amount'],german_credit_df["Duration"])
plt.figure()
<Figure size 640x480 with 0 Axes>
<Figure size 640x480 with 0 Axes>
plt.scatter(german_credit_df['Saving accounts'],german_credit_df["Duration"])
plt.figure()
<Figure size 640x480 with 0 Axes>
<Figure size 640x480 with 0 Axes>
fig = german_credit_df["Purpose"].hist(bins=8)
fig.text(-1, 150, 'Frequency', ha='center')
fig.text(0, -30, 'Radio', ha='center')
fig.text(1, -50, 'education', ha='center')
fig.text(2, -30, 'furniture', ha='center')
fig.text(3, -50, 'car', ha='center')
fig.text(4, -30, 'business', ha='center')
fig.text(5, -50, 'appliances', ha='center')
fig.text(6, -30, 'repairs', ha='center')
fig.text(7, -50, 'vacation', ha='center')
Text(7, -50, 'vacation')
limitedCredit = german_credit_df[(german_credit_df["Credit amount"]<=5000)==True];
imitedCredit = german_credit_df[(german_credit_df["Credit amount"]>2000)==True];
fig = limitedCredit["Purpose"].hist(bins=8)
fig.text(-1, 150, 'Frequency', ha='center')
fig.text(0, -30, 'Radio', ha='center')
fig.text(1, -50, 'education', ha='center')
fig.text(2, -30, 'furniture', ha='center')
fig.text(3, -50, 'car', ha='center')
fig.text(4, -30, 'business', ha='center')
fig.text(5, -50, 'appliances', ha='center')
fig.text(6, -30, 'repairs', ha='center')
fig.text(7, -50, 'vacation', ha='center')
Text(7, -50, 'vacation')
fig =german_credit_df.Age.hist(bins=60)
fig.text(40, -10, 'Age', ha='center')
fig.text(0, 40, 'Frequency', ha='center')
Text(0, 40, 'Frequency')
fig = german_credit_df["Job"].hist()
fig.text(-0.5, 400, 'Frequency', ha='center')
fig.text(0, -100, 'UnSkilled', ha='center')
fig.text(1, -100, 'UnSkilled Resident', ha='center')
fig.text(2, -100, 'Skilled', ha='center')
fig.text(3, -100, 'Highly Skilled', ha='center')
Text(3, -100, 'Highly Skilled')
from sklearn.cluster import KMeans;
from sklearn.decomposition import PCA;
from sklearn.preprocessing import normalize;
y = KMeans().fit_predict(german_credit_df)
X_norm = normalize(german_credit_df);
y_PCA = PCA(n_components=2).fit_transform(X_norm,2);
y_PCA.shape
C:\Users\jki\anaconda3\Lib\site-packages\sklearn\cluster\_kmeans.py:1412: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning super()._check_params_vs_input(X, default_n_init=10)
(1000, 2)
plt.scatter(german_credit_df['Credit amount'],german_credit_df['Age'],c=y)
plt.figure()
plt.scatter(y_PCA[:,0],y_PCA[:,1],c=y)
<matplotlib.collections.PathCollection at 0x1b1247c90d0>